0.1 Libraries

library(rmarkdown)
library(httr)
library(data.table)
library(dplyr)
library(lubridate)
library(knitr)
library(kableExtra)
library(ggplot2)
library(stringr)
library(magrittr)
library(highcharter)
library(forecast)
library(xlsx)
library(fastDummies)
library(zoo)
library(prophet)
library(tidyr)
library(psych)
library(rsample) 
library(caret)
library(MLmetrics)
library(modelr)

1 Daten importieren und vorbereiten

1.1 Meteorologische Daten

fread("https://data.geo.admin.ch/ch.meteoschweiz.klima/nbcn-tageswerte/nbcn-daily_BAS_previous.csv", sep = ";", colClasses = c("character", "Date", rep("numeric", 10))) %>%
  mutate(
    timestamp = as.POSIXct(
      paste0(
        substr(date, 1, 4),
        "-",
        substr(date, 5, 6),
        "-",
        substr(date, 7, 8),
        " 00:00:00"),
      , format="%Y-%m-%d %H:%M:%S"
    )
  ) %>%
  mutate_at(c("gre000d0", "hto000d0", "nto000d0", "prestad0", "rre150d0", "sre000d0", "tre200d0", "tre200dn", "tre200dx", "ure200d0"), as.numeric) %>%
  bind_rows(
    # fread("test2.csv", sep = ";", colClasses = c("character", "Date", rep("numeric", 10))) %>%
    fread("https://data.geo.admin.ch/ch.meteoschweiz.klima/nbcn-tageswerte/nbcn-daily_BAS_current.csv", sep = ";", colClasses = c("character", "Date", rep("numeric", 10))) %>%
      mutate(
        timestamp = as.POSIXct(
          paste0(
            substr(date, 1, 4),
            "-",
            substr(date, 5, 6),
            "-",
            substr(date, 7, 8),
            " 00:00:00"),
          , format="%Y-%m-%d  %H:%M:%S", tz="Europe/Berlin"
        )
      ) %>%
      mutate_at(c("gre000d0", "hto000d0", "nto000d0", "prestad0", "rre150d0", "sre000d0", "tre200d0", "tre200dn", "tre200dx", "ure200d0"), as.numeric)
  ) %>%
  relocate(timestamp) %>%
  select(-c(date, `station/location`)) %>%
  mutate(
    year = as.numeric(substr(timestamp, 1, 4)),
    month = as.numeric(substr(timestamp, 6, 7)),
    day = as.numeric(substr(timestamp, 9, 10))
  ) %>%
  data.frame() %>%
  assign("meteo", ., inherits = TRUE)
meteo %>%
         mutate(Date=as.Date(timestamp)) %>%
           select(Date, max_temp = tre200dx, min_temp = tre200dn, mean_temp = tre200d0) %>%
           filter(year(Date)>1970) -> temp

highchart(type = "stock") %>% 
  hc_add_series(type = "line", 
                data = temp, 
                hcaes(Date, mean_temp), 
                color = "#B00000",
                tooltip = list(pointFormat = "Tagesmittel: {point.mean_temp:.1f} C"),
                size = 0.15) %>%
  hc_add_series(type = "line", 
                data = temp, 
                hcaes(Date, max_temp), 
                color = "#661200",
                tooltip = list(pointFormat = "Tagesmaximum: {point.max_temp:.1f} C"),
                size = 0.15) %>%
  hc_add_series(type = "line", 
                data = temp, 
                hcaes(Date, min_temp), 
                color = "#DC440E",
                tooltip = list(pointFormat = "Tagesminimum: {point.min_temp:.1f} C"),
                size = 0.15) %>%
  hc_rangeSelector(verticalAlign = "bottom",
                   selected = 1) %>%
  hc_xAxis(title = list(text = "")) %>%
  hc_yAxis(title = list(text = ""), opposite = F)

Figure 1.1: Lufttemperatur seit 1864.



1.2 Stromverbrauchs-Daten

httr::GET("https://data.bs.ch/explore/dataset/100233/download/?format=csv&timezone=Europe%2FBerlin")  %>%
  content(., "text") %>%
  fread(sep=";", colClasses = c(timestamp_interval_start_text = "character")) %>%
  select(timestamp = timestamp_interval_start_text, netzlast_kwh = stromverbrauch_kwh, grundversorgte_kunden_kwh, freie_kunden_kwh) %>%
  arrange(timestamp) %>%
  mutate(
    year = as.numeric(substr(timestamp, 1, 4)),
    month = as.numeric(substr(timestamp, 6, 7)),
    day = as.numeric(substr(timestamp, 9, 10))
  ) %>%
  group_by(year, month, day) %>%
  filter(year > 2011) %>%
  summarise(netzlast_kwh = sum(netzlast_kwh, na.rm = T),
            grundversorgte_kunden_kwh = sum(grundversorgte_kunden_kwh, na.rm = T),
            freie_kunden_kwh = sum(freie_kunden_kwh, na.rm = T)) %>%
  ungroup() %>%
  mutate(timestamp = as.POSIXct(
    paste0(year, "-", month, "-", day, " 00:00:00"), format="%Y-%m-%d  %H:%M:%S", tz="Europe/Berlin")
  ) %>%
  assign("strom_daily", ., inherits = TRUE)



highchart(type = "stock") %>% 
  hc_add_series(strom_daily %>%
         mutate(Date=as.Date(timestamp),
                netzlast_kwh = netzlast_kwh/1000000), 
         type = "line", 
         hcaes(Date, netzlast_kwh), 
         color = "#923F8D",
         tooltip = list(pointFormat = "Stromverbrauch: {point.netzlast_kwh:.2f} GWh"),
         size = 0.15) %>%
  hc_rangeSelector(verticalAlign = "bottom",
                   selected = 1) %>%
  hc_xAxis(title = list(text = "")) %>% 
  hc_yAxis(title = list(text = ""), opposite = F)

Figure 1.2: Täglicher Stromverbrauch seit 2012.



1.3 Feiertage, Ferien und Veranstaltungen

httr::GET("https://data.bs.ch/explore/dataset/100074/download/?format=csv&timezone=Europe%2FBerlin") %>%
  content(., "text") %>%
  fread(sep=";") %>%
  select(tag_datum, name, code, kategorie_name) %>%
  filter(name != "Fasnachtsmontag", name != "Fasnachtsmittwoch", name != "Dies Academicus") %>%
  filter(!(tag_datum == "2008-05-01 00:00:00" & name == "Tag der Arbeit")) %>% # Tag der Arbeit doubles with Auffahrt
  filter(kategorie_name %in% c("Feiertag", "Ferien") | code == "herbstm") %>%
  filter(name != "Semesterferien") %>%
  assign("rd_veranst", ., inherits = TRUE)

rd_veranst %>%
  data.frame() %>%
  mutate(Herbstmesse = if_else(code == "herbstm", "Herbstmesse", "")) %>%
  select(tag_datum, Herbstmesse) %>%
  filter(Herbstmesse != "") %>%
  full_join(
    rd_veranst %>%
      mutate(Feiertage = if_else(kategorie_name == "Feiertag", name, "")) %>%
      select(tag_datum, Feiertage) %>%
      filter(Feiertage != ""),
    by = "tag_datum"
  ) %>%
  full_join(
    rd_veranst %>%
      mutate(Ferien = if_else(kategorie_name == "Ferien", name, "")) %>%
      select(tag_datum, Ferien) %>%
      filter(Ferien != ""),
    by = "tag_datum"
  ) %>%
  mutate(timestamp = as.POSIXct(tag_datum, tz="Europe/Berlin")) %>%
  filter(year(timestamp) > 2011) %>%
  select(timestamp, Herbstmesse, Feiertage, Ferien) %>%
  mutate(
    year = lubridate::year(timestamp),
    month = lubridate::month(lubridate::floor_date(timestamp, "month")),
    day = lubridate::day(lubridate::floor_date(timestamp, "day"))
  ) %>%
  data.frame() %>%
  assign("Veranstaltungen", ., inherits = TRUE)

rm(events, rd_veranst)



1.4 Datensätze zusammenfügen

meteo %>%
  full_join(Veranstaltungen %>%
              select(-timestamp), by = c("year" = "year", "month" = "month", "day" = "day")) %>%
  full_join(strom_daily %>%
              select(-timestamp), by = c("year" = "year", "month" = "month", "day" = "day")) %>%
  mutate(weekday = lubridate::wday(timestamp, label = TRUE, abbr = TRUE, locale="German_Germany"),
         daytype = if_else(weekday %in% c("So", "Sa"), "Wochenende", "Werktage")) %>%
  mutate(Covid19_Lockdown = if_else(timestamp < as.POSIXct("2020-03-15", format="%Y-%m-%d"), 0, 
                                    if_else(timestamp > as.POSIXct("2020-05-10", format="%Y-%m-%d"), 0, 1))
  ) %>%
  relocate(timestamp, netzlast_kwh, grundversorgte_kunden_kwh, freie_kunden_kwh) %>%
  filter(year(timestamp) > 2011) %>%
  mutate(HGT = if_else(tre200d0 <= 12, 20-tre200d0, 0)) %>%
  mutate(Herbstmesse = if_else(is.na(Herbstmesse), "No", Herbstmesse),
         Feiertage = if_else(is.na(Feiertage), "No", Feiertage),
         Ferien = if_else(is.na(Ferien), "No", Ferien),
         HGT_sq = HGT^2) %>%
  drop_na() %>%
  mutate(time = as.Date(timestamp, tz = 'Europe/Berlin')) %>%
  select(-timestamp) %>%
  relocate(time) %>%
  assign("Data", ., inherits = TRUE)

Data %>%
  mutate(time = as.Date(time),
         Feiertage_dummy = if_else(Feiertage == "No", 0, 1),
         Ferien_dummy = if_else(Ferien == "No", 0, 1),
         Herbstmesse_dummy = if_else(Herbstmesse == "No", 0, 1),
         Wochenende_dummy = if_else(daytype == "Werktage", 0, 1),
         Mo_dummy = if_else(weekday == "Mo", 1, 0),
         Di_dummy = if_else(weekday == "Di", 1, 0),
         Mi_dummy = if_else(weekday == "Mi", 1, 0),
         Do_dummy = if_else(weekday == "Do", 1, 0),
         Fr_dummy = if_else(weekday == "Fr", 1, 0),
         Sa_dummy = if_else(weekday == "Sa", 1, 0),
         So_dummy = if_else(weekday == "So", 1, 0),
         time_num = as.numeric(time),
         Neujahrstag_dummy = if_else(Feiertage == "Neujahrstag", 1, 0),
         Karfreitag_dummy = if_else(Feiertage == "Karfreitag", 1, 0),
         Ostersonntag_dummy = if_else(Feiertage == "Ostersonntag", 1, 0),
         Ostermontag_dummy = if_else(Feiertage == "Ostermontag", 1, 0),
         Tag_der_Arbeit_dummy = if_else(Feiertage == "Tag der Arbeit", 1, 0),
         Auffahrt_dummy = if_else(Feiertage == "Auffahrt", 1, 0),
         Pfingssonntag_dummy = if_else(Feiertage == "Pfingssonntag", 1, 0),
         Pfingstmontag_dummy = if_else(Feiertage == "Pfingstmontag", 1, 0),
         Bundesfeiertag_dummy = if_else(Feiertage == "Bundesfeiertag", 1, 0),
         Heiligabend_dummy = if_else(Feiertage == "Heiligabend", 1, 0),
         Weihnachten_dummy = if_else(Feiertage == "Weihnachten", 1, 0),
         Stephanstag_dummy = if_else(Feiertage == "Stephanstag", 1, 0),
         Silvester_dummy = if_else(Feiertage == "Silvester", 1, 0),
         Weihnachtsferien_dummy = if_else(Feiertage == "Weihnachtsferien", 1, 0),
         Fasnachtsferien_dummy = if_else(Feiertage == "Fasnachtsferien (Sportferien)", 1, 0),
         Osterferien_dummy = if_else(Feiertage == "Osterferien (Frühlingsferien)", 1, 0),
         Sommerferien_dummy = if_else(Feiertage == "Sommerferien", 1, 0),
         Herbstferien_dummy = if_else(Feiertage == "Herbstferien", 1, 0),
         tre200d0_sq = tre200d0^2
  ) %>%
  dplyr::select(-c(grundversorgte_kunden_kwh, freie_kunden_kwh, year, month, day, Herbstmesse, Feiertage, Ferien, daytype, weekday)) %>%
  mutate(
    Ettape = case_when(
      time < as.Date("2022-07-01") ~ "Train",
      time >= as.Date("2022-07-01") & time < as.Date("2023-05-01") ~ "Test",
      time >= as.Date("2023-05-01") ~ "Prognose"
    )
  ) %>%
  assign("Data_selec", ., inherits = TRUE)



2 Das Modell

set.seed(1234) 

Data_model <- Data_selec %>%
  filter(time < as.Date("2023-05-01")) %>%
  mutate(time_num = as.numeric(time)) %>%
  arrange(time)

# Data_prognose <- Data_selec %>%
#   filter(time >= as.Date("2023-05-01"))


tab_split <- initial_split(Data_model, prop = .7)
tab_train <- training(tab_split) 
tab_test  <- testing(tab_split) 



model_lm <- lm(netzlast_kwh ~ time_num + I(time_num^2) + Di_dummy + Mi_dummy + Do_dummy + Fr_dummy + Sa_dummy + So_dummy + gre000d0 + hto000d0 + 
                    # nto000d0 + prestad0 + 
                    rre150d0 + sre000d0 + tre200d0 + I(tre200d0^2) + 
                    # tre200dn + tre200dx + 
                    ure200d0 + Covid19_Lockdown + HGT + I(HGT^2) + Neujahrstag_dummy + Karfreitag_dummy + 
                    # Ostersonntag_dummy + 
                    Ostermontag_dummy + Tag_der_Arbeit_dummy + Auffahrt_dummy + 
                    # Pfingssonntag_dummy + 
                    Pfingstmontag_dummy + Bundesfeiertag_dummy + Heiligabend_dummy + Weihnachten_dummy + Stephanstag_dummy + Silvester_dummy + Ferien_dummy + 
                    time_num * Wochenende_dummy + 
                    Neujahrstag_dummy * Wochenende_dummy +
                    # Karfreitag_dummy * Wochenende_dummy +
                    Tag_der_Arbeit_dummy * Wochenende_dummy +
                    # Auffahrt_dummy * Wochenende_dummy +
                    Bundesfeiertag_dummy  * Wochenende_dummy +
                    Heiligabend_dummy * Wochenende_dummy +
                    Stephanstag_dummy * Wochenende_dummy, data = tab_train)


Data_selec %>%
  bind_cols(
    predict(model_lm, Data_selec, interval = "prediction")
  ) %>%
  select(time, fit, lwr, upr, netzlast_kwh) %>%
  mutate(
    Ettape = case_when(
      time < as.Date("2022-07-01") ~ "Train",
      time >= as.Date("2022-07-01") & time < as.Date("2023-05-01") ~ "Test",
      time >= as.Date("2023-05-01") ~ "Prognose"
    )
  ) %>%
  mutate(resid = netzlast_kwh - fit) %>%
  assign("Pred_lm", ., inherits = TRUE)

write.csv2(Pred_lm, "100245_Strom_Wetter.csv", row.names=F, na = "") 
  
Pred_lm %>%
  mutate(time_num = as.numeric(time)) %>%
  assign("Pred_lm", ., inherits = TRUE)



Ettape R2 RMSE MAE
Train 0.9503770 105297.29 79198.61
Test 0.9063178 94903.66 69022.56
Prognose 0.9414487 85902.75 66292.40




Pred_lm %>%
  filter(time >= as.Date("2023-07-01")) %>%
  mutate(netzlast_kwh = netzlast_kwh/1000000,
         fit = fit/1000000,
         lwr = lwr/1000000,
         upr = upr/1000000) -> recent

hchart(recent, "line", hcaes(time, netzlast_kwh), color = "#008AC3",
       tooltip = list(pointFormat = "Effektiver Stromverbrauch: {point.netzlast_kwh:.2f} GWh",
                      shared = TRUE),
       zIndex = 1) %>%
  hc_add_series(recent, "line", hcaes(time, fit), color = "#B375AB",
       tooltip = list(pointFormat = "Erwarteter Stromverbrauch: {point.fit:.2f} GWh",
                      shared = TRUE),
       zIndex = 2) %>%
  hc_add_series(recent, type = "arearange",
                hcaes(x = time, low = lwr, high = upr),
                zIndex = 0,
                color = "#E7CEE2",
                tooltip = list(pointFormat = "95% Konfidenzintervall: {point.lwr:.2f} - {point.upr:.2f} GWh"), shared = TRUE
                ) %>%
  hc_xAxis(title = list(text = "")) %>% 
  hc_yAxis(title = list(text = "")) %>%
  hc_plotOptions(series = list(marker = list(enabled = FALSE)))

Figure 2.1: Effektiver und erwarteter täglicher Stromverbrauch.




Hinweis: Je nach Kombination von Betriebssystemen und Versionen von RStudio, R und dem Paket prophet können die Ergebnisse leicht von den publizierten Resultaten abweichen. Grund dafür sind Unterschiede am Zufalls-Generator für die Prognose des Modells. Die angewendete Konfiguration lautet:


sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=German_Switzerland.utf8  LC_CTYPE=German_Switzerland.utf8   
## [3] LC_MONETARY=German_Switzerland.utf8 LC_NUMERIC=C                       
## [5] LC_TIME=German_Switzerland.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices datasets  utils     methods   base     
## 
## other attached packages:
##  [1] modelr_0.1.11     MLmetrics_1.1.1   caret_6.0-94      lattice_0.21-8   
##  [5] rsample_1.2.0     psych_2.3.6       tidyr_1.3.0       prophet_1.0      
##  [9] rlang_1.1.1       Rcpp_1.0.11       zoo_1.8-11        fastDummies_1.7.3
## [13] xlsx_0.6.5        forecast_8.21.1   highcharter_0.9.4 magrittr_2.0.3   
## [17] stringr_1.5.0     ggplot2_3.4.4     kableExtra_1.3.4  knitr_1.43       
## [21] lubridate_1.9.2   dplyr_1.1.3       data.table_1.14.8 httr_1.4.7       
## [25] rmarkdown_2.25   
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-163         xts_0.12.2           webshot_0.5.5       
##  [4] tools_4.2.2          backports_1.4.1      bslib_0.5.1         
##  [7] utf8_1.2.3           R6_2.5.1             rpart_4.1.19        
## [10] colorspace_2.1-0     nnet_7.3-19          withr_2.5.0         
## [13] tidyselect_1.2.0     mnormt_2.1.1         curl_5.0.2          
## [16] compiler_4.2.2       cli_3.6.1            rvest_1.0.3         
## [19] xml2_1.3.3           bookdown_0.35        sass_0.4.7          
## [22] tseries_0.10-54      scales_1.2.1         lmtest_0.9-40       
## [25] fracdiff_1.5-2       quadprog_1.5-8       systemfonts_1.0.4   
## [28] digest_0.6.33        svglite_2.1.1        pkgconfig_2.0.3     
## [31] htmltools_0.5.6      parallelly_1.36.0    highr_0.10          
## [34] fastmap_1.1.1        htmlwidgets_1.6.2    TTR_0.24.3          
## [37] rstudioapi_0.15.0    quantmod_0.4.25      jquerylib_0.1.4     
## [40] generics_0.1.3       jsonlite_1.8.7       ModelMetrics_1.2.2.2
## [43] rlist_0.4.6.2        Matrix_1.6-1         munsell_0.5.0       
## [46] fansi_1.0.4          lifecycle_1.0.3      furrr_0.3.1         
## [49] pROC_1.18.4          stringi_1.7.12       yaml_2.3.6          
## [52] MASS_7.3-60          recipes_1.0.8        plyr_1.8.8          
## [55] grid_4.2.2           parallel_4.2.2       listenv_0.9.0       
## [58] splines_4.2.2        xlsxjars_0.6.1       pillar_1.9.0        
## [61] igraph_1.5.1         stats4_4.2.2         reshape2_1.4.4      
## [64] future.apply_1.11.0  codetools_0.2-19     urca_1.3-3          
## [67] glue_1.6.2           evaluate_0.21        renv_1.0.3          
## [70] RcppParallel_5.1.7   foreach_1.5.2        vctrs_0.6.3         
## [73] gtable_0.3.4         purrr_1.0.2          future_1.33.0       
## [76] assertthat_0.2.1     cachem_1.0.8         gower_1.0.1         
## [79] xfun_0.39            prodlim_2023.08.28   broom_1.0.5         
## [82] survival_3.5-7       class_7.3-22         viridisLite_0.4.2   
## [85] timeDate_4022.108    tibble_3.2.1         iterators_1.0.14    
## [88] rJava_1.0-6          hardhat_1.3.0        lava_1.7.2.1        
## [91] timechange_0.2.0     globals_0.16.2       ellipsis_0.3.2      
## [94] ipred_0.9-14
renv::snapshot()
## - The lockfile is already up to date.